

clear all
cap log close

log using "${logfiles}step04_matching.log", replace

* For version control 
set seed 123456

forvalues Y=2006 (1) 2014 {
	
use "$dataout\DV_workplace_combined_allyears_2006_2014.dta", clear
gen year_event=baseyr
gen wp_crime=1 

keep victim_shnro defendant_shnro sykstun year_event time year wp_crime suspect_sex plaintiff_sex occ realAllEarn victim_occ victim_realAllEarn crimecode6 victim_sykstun_lag sykstun_lag


sort defendant_shnro year_event time sykstun
bysort defendant_shnro year_event: gen defendant_switch=1 if sykstun!=sykstun[_n-1] & sykstun!= "" & time==1 
bysort defendant_shnro year_event: replace defendant_switch=1 if sykstun!=sykstun[_n-1] & sykstun== "" & sykstun[_n-1]!= "" & time==0 

bysort defendant_shnro year_event: ereplace defendant_switch=max(defendant_switch)

replace defendant_switch=0 if defendant_switch==. 

rename defendant_shnro shnro
sort shnro 

* keep only the base year observations
keep if year_event==`Y'
keep if time==0

gen manager_sub=(occ==1 & victim_occ!=1)

* earnings gap 
gen gap=realAllEarn-victim_realAllEarn
sum gap if plaintiff_sex== "2" & suspect_sex=="1", d
local med = r(p50)

gen manager_sub2= (gap >= `med')



local minus1=`Y'-1 
local minus2=`Y'-2 
local minus3=`Y'-3 
local minus4=`Y'-4 
local minus5=`Y'-5 

        * merge with past 5 year's characteristics for matching
		merge m:1 shnro using "D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`Y'.dta", keepusing(shnro sykstun sukup syrtun ptoim1 tyotu svatva toimiala ammattikoodi ututku ika tyrtu svatvp)
		
		
		* drop people who are in retirement and are below 18
	     drop if ika>68
		 drop if ika<18 
	
		 sort shnro wp_crime 
	     drop if _n>200000 & wp_crime!=1 & _merge==2 
	
	
		
		drop _merge 
		rename ptoim1 ptoim1_0
		rename tyotu tyotu_0
		rename tyrtu tyrtu_0
		rename sykstun sykstun_0 
		
		* merge with past crime records 
		merge m:1 shnro using  "W:\workplace\data\crime_records_year_`Y'", keepusing(cum_record)
		keep if _merge==3 
		drop _merge 
		rename cum_record cum_record_0
		
		merge m:1 shnro using "D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`minus1'.dta", keepusing(shnro ammattikoodi sykstun ptoim1 tyotu tyrtu)
		keep if _merge==3
		drop _merge 
		rename ptoim1 ptoim1_1
		rename tyotu tyotu_1
		rename tyrtu tyrtu_1
		rename sykstun sykstun_1
		rename ammattikoodi ammattikoodi_1 
		
		* merge with past crime records 
		merge m:1 shnro using  "W:\workplace\data\crime_records_year_`minus1'", keepusing(cum_record)
		keep if _merge==3
		drop _merge 
		rename cum_record cum_record_1
		
		
		
		merge m:1 shnro using "D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`minus2'.dta", keepusing(shnro sykstun ptoim1 tyotu tyrtu)
		keep if _merge==3
		drop _merge 
		rename ptoim1 ptoim1_2
		rename tyotu tyotu_2
		rename tyrtu tyrtu_2
		rename sykstun sykstun_2
		
		
		* merge with past crime records 
		merge m:1 shnro using  "W:\workplace\data\crime_records_year_`minus2'", keepusing(cum_record)
		keep if _merge==3 
		drop _merge 
		rename cum_record cum_record_2
		
		
		merge m:1 shnro using "D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`minus3'.dta", keepusing(shnro sykstun ptoim1 tyotu tyrtu)
		drop _merge 
		rename ptoim1 ptoim1_3
		rename tyotu tyotu_3
		rename tyrtu tyrtu_3
		rename sykstun sykstun_3
		
		
		* merge with past crime records 
		merge m:1 shnro using  "W:\workplace\data\crime_records_year_`minus3'", keepusing(cum_record)
		keep if _merge==3
		drop _merge 
		rename cum_record cum_record_3
		
		
		merge m:1 shnro using "D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`minus4'.dta", keepusing(shnro sykstun ptoim1 tyotu tyrtu)
		keep if _merge==3
		drop _merge 
		rename ptoim1 ptoim1_4
		rename tyotu tyotu_4
		rename tyrtu tyrtu_4
		rename sykstun sykstun_4
		
		* merge with past crime records 
		merge m:1 shnro using  "W:\workplace\data\crime_records_year_`minus4'", keepusing(cum_record)
		keep if _merge==3 
		drop _merge 
		rename cum_record cum_record_4
		
		
		merge m:1 shnro using "D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`minus5'.dta", keepusing(shnro sykstun ptoim1 tyotu tyrtu)
		keep if _merge==3
		drop _merge 
		rename ptoim1 ptoim1_5
		rename tyotu tyotu_5
		rename tyrtu tyrtu_5
		rename sykstun sykstun_5
		
		* merge with past crime records 
		merge m:1 shnro using  "W:\workplace\data\crime_records_year_`minus5'", keepusing(cum_record)
		keep if _merge==3
		drop _merge 
		rename cum_record cum_record_5
		
	ereplace year=max(year)
	
	*Demographics
	gen age=ika 
	gen gender=(sukup=="1")
	
	*Education variables
	g educ=ututku 
	destring educ, replace
	gen level=substr(ututku, 1,1)
	destring level, replace
	replace level=2 if level==.
	
      
	*Employment and earnings
	forvalues i=0(1)5{
	destring ptoim1_`i', replace
	gen employed`i'=(ptoim1_`i'==11 & sykstun_`i'!=".")

	* Earnings
	replace tyotu_`i' = 0 if missing(tyotu_`i')
	replace tyrtu_`i' = 0 if missing(tyrtu_`i')

	* Sum of labor and enrepreneur earnings 
	gen allEarnings_`i' =  tyotu_`i' + tyrtu_`i'

	gen realAllEarn`i' = allEarnings_`i'
    }
	
	
	replace wp_crime=0 if wp_crime==.
	sort shnro wp_crime 
	drop if _n>100000 & wp_crime==0
	
	
	gen occ_code=real(substr(ammattikoodi_1,1,1))
	gen manager_1=(occ_code==1)

	*Generate log earnings and earnings levels for matching purposes
	gen log_realearn1=log(realAllEarn1+1)
	gen log_realearn2=log(realAllEarn2+1)
	gen log_realearn3=log(realAllEarn3+1)
	gen log_realearn4=log(realAllEarn4+1)
	gen log_realearn5=log(realAllEarn5+1)

	gen earning_level_1=1 if (realAllEarn1<=20000)
	replace earning_level_1=2 if (realAllEarn1<=25000 & realAllEarn1>20000)
	replace earning_level_1=3 if (realAllEarn1<=30000 & realAllEarn1>25000)
	replace earning_level_1=4 if (realAllEarn1<=35000 & realAllEarn1>30000)
	replace earning_level_1=5 if (realAllEarn1<=40000 & realAllEarn1>35000)
	replace earning_level_1=6 if (realAllEarn1<=45000 & realAllEarn1>40000)
	replace earning_level_1=7 if (realAllEarn1<=50000 & realAllEarn1>45000)
	replace earning_level_1=8 if (realAllEarn1<=55000 & realAllEarn1>50000)
	replace earning_level_1=9 if (realAllEarn1<=60000 & realAllEarn1>55000)
	replace earning_level_1=10 if (realAllEarn1>60000)
	
	gen earning_level_2=1 if (realAllEarn2<=20000)
	replace earning_level_2=2 if (realAllEarn2<=25000 & realAllEarn2>20000)
	replace earning_level_2=3 if (realAllEarn2<=30000 & realAllEarn2>25000)
	replace earning_level_2=4 if (realAllEarn2<=35000 & realAllEarn2>30000)
	replace earning_level_2=5 if (realAllEarn2<=40000 & realAllEarn2>35000)
	replace earning_level_2=6 if (realAllEarn2<=45000 & realAllEarn2>40000)
	replace earning_level_2=7 if (realAllEarn2<=50000 & realAllEarn2>45000)
	replace earning_level_2=8 if (realAllEarn2<=55000 & realAllEarn2>50000)
	replace earning_level_2=9 if (realAllEarn2<=60000 & realAllEarn2>55000)
	replace earning_level_2=10 if (realAllEarn2>60000)
	
	gen earning_level_3=1 if (realAllEarn3<=20000)
	replace earning_level_3=2 if (realAllEarn3<=25000 & realAllEarn3>20000)
	replace earning_level_3=3 if (realAllEarn3<=30000 & realAllEarn3>25000)
	replace earning_level_3=4 if (realAllEarn3<=35000 & realAllEarn3>30000)
	replace earning_level_3=5 if (realAllEarn3<=40000 & realAllEarn3>35000)
	replace earning_level_3=6 if (realAllEarn3<=45000 & realAllEarn3>40000)
	replace earning_level_3=7 if (realAllEarn3<=50000 & realAllEarn3>45000)
	replace earning_level_3=8 if (realAllEarn3<=55000 & realAllEarn3>50000)
	replace earning_level_3=9 if (realAllEarn3<=60000 & realAllEarn3>55000)
	replace earning_level_3=10 if (realAllEarn3>60000)
	
	gen earning_level_4=1 if (realAllEarn4<=20000)
	replace earning_level_4=2 if (realAllEarn4<=25000 & realAllEarn4>20000)
	replace earning_level_4=3 if (realAllEarn4<=30000 & realAllEarn4>25000)
	replace earning_level_4=4 if (realAllEarn4<=35000 & realAllEarn4>30000)
	replace earning_level_4=5 if (realAllEarn4<=40000 & realAllEarn4>35000)
	replace earning_level_4=6 if (realAllEarn4<=45000 & realAllEarn4>40000)
	replace earning_level_4=7 if (realAllEarn4<=50000 & realAllEarn4>45000)
	replace earning_level_4=8 if (realAllEarn4<=55000 & realAllEarn4>50000)
	replace earning_level_4=9 if (realAllEarn4<=60000 & realAllEarn4>55000)
	replace earning_level_4=10 if (realAllEarn4>60000)
	
	gen earning_level_5=1 if (realAllEarn5<=20000)
	replace earning_level_5=2 if (realAllEarn5<=25000 & realAllEarn5>20000)
	replace earning_level_5=3 if (realAllEarn5<=30000 & realAllEarn5>25000)
	replace earning_level_5=4 if (realAllEarn5<=35000 & realAllEarn5>30000)
	replace earning_level_5=5 if (realAllEarn5<=40000 & realAllEarn5>35000)
	replace earning_level_5=6 if (realAllEarn5<=45000 & realAllEarn5>40000)
	replace earning_level_5=7 if (realAllEarn5<=50000 & realAllEarn5>45000)
	replace earning_level_5=8 if (realAllEarn5<=55000 & realAllEarn5>50000)
	replace earning_level_5=9 if (realAllEarn5<=60000 & realAllEarn5>55000)
	replace earning_level_5=10 if (realAllEarn5>60000)
	
	gen outcomes = 0
    
	sort shnro wp_crime 
	
	
	* Do nearest neighbor matching 
	teffects nnmatch (outcomes age gender level employed1 employed2 employed3 employed4 employed5 log_realearn1 log_realearn2 log_realearn3 log_realearn4 log_realearn5 earning_level_* cum_record_1 cum_record_2 cum_record_3 cum_record_4 cum_record_5 manager_1) (wp_crime), gen (match_id) 
	
	

	* Find their matches and generate the match id
	save "$dataout\complete", replace 
	gen rownum=_n
	
	preserve 
	keep if wp_crime==1 & !missing(match_id1)
	save "$dataout\treatment", replace
	restore 
	
	keep if wp_crime==0 & !missing(match_id1)
	drop match_id1
	rename rownum match_id1
	
	save "$dataout\control", replace
	
	merge 1:m match_id1 using "$dataout\treatment", keepusing (match_id1)
	keep if _merge==3
	drop _merge
	tab match_id1
	
	append using "$dataout\treatment"
	
	save "$dataout\allmatches_long_`Y'.dta", replace
  
}



// After finding the match, link to 5 years before and 5 years after outcomes 
forvalues Y=2006 (1) 2014 {

	forvalues a= -5(1)5{
	use "$dataout\allmatches_long_`Y'.dta", clear
	rename sykstun_lag defendant_sykstun_lag
	drop ptoim1*
	drop tyrtu*
	drop tyotu*
	drop sykstun*
	drop ammattikoodi*
	drop time 
	gen time=`a'
	ereplace year_event=max(year_event)
	
	local k=`Y'+`a'

    
	if `k'<2017 {
	merge m:1 shnro using "${data}fleed_kokonais_`k'.dta", keepusing (shnro sykstun ptoim1 tyotu tyrtu ammattikoodi)
	drop if _merge==2
	gen matched_sample=1 

	
	drop _merge 
	save "$dataout\allmatches_long_`Y'_`k'.dta", replace
    
	}
	
	if `k'>=2017{
	merge m:1 shnro using "$dataout\FOLK_emp_earnings_`k'", keepusing (shnro ptoim1 tyotu tyrtu ammattikoodi_k)
		drop if _merge==2
	
	
	drop _merge 
	
	rename tyrtuo tyrtu
	
	save "$dataout\allmatches_long_`Y'_`k'.dta", replace

	}
	
	}

}

// Combine all years togther, create matched sample long panel

clear 
forvalues Y=2006 (1) 2014 {
	forvalues i=-5(1) 5{
		local a=`Y'+`i'
	append using "$dataout\allmatches_long_`Y'_`a'.dta"
}
}


replace year=year_event+time


drop employed* allEarnings* realAllEarn*

   *Employment
    destring ptoim1, replace
	
	gen employed = (ptoim1==11 & sykstun!="." & year<=2016)
	replace employed=1 if (ptoim1==11 & year>2016)
	
	* Earnings
	replace tyotu = 0 if missing(tyotu)
	replace tyrtu = 0 if missing(tyrtu)

	
	gen allEarnings =  tyotu + tyrtu 
	
	
	* Deflate earnings
	fmerge m:1 year using  "${dataout}\cpi"
	keep if _merge == 3 
	drop _merge
	
    
	gen realAllEarn = allEarnings/cpi
	drop cpi 
	
	*Occupations
	gen occ1=real(substr(ammattikoodi,1,1))
	
	
	replace defendant_switch=0 if defendant_switch==. 
	replace manager_sub=0 if manager_sub==. 
	replace manager_sub2=0 if manager_sub2==. 
	
	
	egen match_id_new=group(match_id1)
	egen match_id_max=max(match_id_new)
	replace match_id1=match_id_new
	
	sort match_id1 time wp_crime 
    bysort match_id1 time: gen temp=_N 
	
	
	
sort match_id1 time wp_crime shnro 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max if temp==4 & _n==1 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max if temp==4 & _n==3
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max if temp==6 & _n==1 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max if temp==6 & _n==4
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max+1 if temp==6 & _n==2 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max+1 if temp==6 & _n==5

bysort match_id1 time: replace match_id_new=match_id_new+match_id_max if temp==8 & _n==1 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max if temp==8 & _n==5
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max+1 if temp==8 & _n==2 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max+1 if temp==8 & _n==6
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max+1 if temp==8 & _n==3 
bysort match_id1 time: replace match_id_new=match_id_new+match_id_max+1 if temp==8 & _n==7


drop temp 
bysort match_id_new time: gen temp=_N
tab temp 
replace match_id1=match_id_new 
drop match_id_new match_id_max 


sort match_id1 time wp_crime shnro 
save "$dataout\allmatches_allyears_matchpast5.dta", replace




